<!DOCTYPE HTML>
<html lang="en" >
    
    <head>
        
        <meta charset="UTF-8">
        <meta http-equiv="X-UA-Compatible" content="IE=edge" />
        <title>纯正则爬取智联的职位信息 | 爬虫</title>
        <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
        <meta name="description" content="">
        <meta name="generator" content="GitBook 2.6.7">
        
        
        <meta name="HandheldFriendly" content="true"/>
        <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
        <meta name="apple-mobile-web-app-capable" content="yes">
        <meta name="apple-mobile-web-app-status-bar-style" content="black">
        <link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
        <link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
        
    <link rel="stylesheet" href="../gitbook/style.css">
    
        
        <link rel="stylesheet" href="../gitbook/plugins/gitbook-plugin-highlight/website.css">
        
    
        
        <link rel="stylesheet" href="../gitbook/plugins/gitbook-plugin-search/search.css">
        
    
        
        <link rel="stylesheet" href="../gitbook/plugins/gitbook-plugin-fontsettings/website.css">
        
    
    

        
    
    
    <link rel="next" href="../spider_projects/setting.html" />
    
    
    <link rel="prev" href="../spider_projects/liepin.html" />
    

        
    </head>
    <body>
        
        
    <div class="book"
        data-level="10.7"
        data-chapter-title="纯正则爬取智联的职位信息"
        data-filepath="spider_projects/zhilianRe.md"
        data-basepath=".."
        data-revision="Fri Oct 19 2018 08:44:08 GMT+0800 (中国标准时间)"
        data-innerlanguage="">
    

<div class="book-summary">
    <nav role="navigation">
        <ul class="summary">
            
            
            
            

            

            
    
        <li class="chapter " data-level="0" data-path="index.html">
            
                
                    <a href="../index.html">
                
                        <i class="fa fa-check"></i>
                        
                        序言
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="1" data-path="认识爬虫/introduceSpider.html">
            
                
                    <a href="../认识爬虫/introduceSpider.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.</b>
                        
                        认识爬虫
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="1.1" data-path="认识爬虫/http.html">
            
                
                    <a href="../认识爬虫/http.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.1.</b>
                        
                        HTTP
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="1.2" data-path="认识爬虫/Requests.html">
            
                
                    <a href="../认识爬虫/Requests.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.2.</b>
                        
                        requests
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="1.3" data-path="认识爬虫/初步爬虫.html">
            
                
                    <a href="../认识爬虫/初步爬虫.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.3.</b>
                        
                        初步爬虫小项目
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="2" data-path="正则表达式/正则表达式.html">
            
                
                    <a href="../正则表达式/正则表达式.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.</b>
                        
                        正则表达式
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="2.1" data-path="正则表达式/正则表达式练习.html">
            
                
                    <a href="../正则表达式/正则表达式练习.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.1.</b>
                        
                        正则表达式练习
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.2" data-path="正则表达式/正则表达式之后的第一个项目.html">
            
                
                    <a href="../正则表达式/正则表达式之后的第一个项目.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.2.</b>
                        
                        正则表达式之后的第一个项目
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.3" data-path="正则表达式/正则表达式常用表.html">
            
                
                    <a href="../正则表达式/正则表达式常用表.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.3.</b>
                        
                        正则表达式常用表
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.4" data-path="正则表达式/正则表达式重点1.html">
            
                
                    <a href="../正则表达式/正则表达式重点1.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.4.</b>
                        
                        正则表达式重点
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.5" data-path="正则表达式/正则表达式常问问题.html">
            
                
                    <a href="../正则表达式/正则表达式常问问题.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.5.</b>
                        
                        正则表达式常问问题
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.6" data-path="正则表达式/正则爬虫练习.html">
            
                
                    <a href="../正则表达式/正则爬虫练习.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.6.</b>
                        
                        正则爬虫项目练习
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="3" data-path="XPATH/xpath-in.html">
            
                
                    <a href="../XPATH/xpath-in.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.</b>
                        
                        XPATH
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="3.1" data-path="XPATH/Xpath.html">
            
                
                    <a href="../XPATH/Xpath.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.1.</b>
                        
                        XPATH认识
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.2" data-path="XPATH/Xpath的代码例子.html">
            
                
                    <a href="../XPATH/Xpath的代码例子.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.2.</b>
                        
                        XPATH的代码例子
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.3" data-path="XPATH/xpath的爬虫练习.html">
            
                
                    <a href="../XPATH/xpath的爬虫练习.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.3.</b>
                        
                        XPATH的爬虫练习
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="4" data-path="BeautifulSoup/BeautifulSoup.html">
            
                
                    <a href="../BeautifulSoup/BeautifulSoup.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.</b>
                        
                        BeautifulSoup
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="4.1" data-path="BeautifulSoup/常用的css选择器.html">
            
                
                    <a href="../BeautifulSoup/常用的css选择器.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.1.</b>
                        
                        常用的css选择器
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="4.2" data-path="BeautifulSoup/BeautifulSoup4的各种例子.html">
            
                
                    <a href="../BeautifulSoup/BeautifulSoup4的各种例子.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.2.</b>
                        
                        BeautifulSoup4的各种例子
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="5" data-path="代理/proxy.html">
            
                
                    <a href="../代理/proxy.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>5.</b>
                        
                        ip代理池项目
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="6" data-path="Selenium/Selenium.html">
            
                
                    <a href="../Selenium/Selenium.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>6.</b>
                        
                        Selenium
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7" data-path="进程线程协程/introduce.html">
            
                
                    <a href="../进程线程协程/introduce.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.</b>
                        
                        进程线程协程
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="7.1" data-path="进程线程协程/gevent.html">
            
                
                    <a href="../进程线程协程/gevent.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.1.</b>
                        
                        gevent
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7.2" data-path="进程线程协程/green_let.html">
            
                
                    <a href="../进程线程协程/green_let.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.2.</b>
                        
                        green_let
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7.3" data-path="进程线程协程/yield.html">
            
                
                    <a href="../进程线程协程/yield.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.3.</b>
                        
                        yield
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7.4" data-path="进程线程协程/multiprecessing.html">
            
                
                    <a href="../进程线程协程/multiprecessing.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.4.</b>
                        
                        multiprecessing
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7.5" data-path="进程线程协程/threading.html">
            
                
                    <a href="../进程线程协程/threading.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.5.</b>
                        
                        threading
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="8" data-path="scrapy框架/scrapy.html">
            
                
                    <a href="../scrapy框架/scrapy.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>8.</b>
                        
                        Scrapy
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="8.1" data-path="scrapy框架/scrapy_setting.html">
            
                
                    <a href="../scrapy框架/scrapy_setting.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>8.1.</b>
                        
                        scrapy_setting
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="8.2" data-path="scrapy框架/模块作用.html">
            
                
                    <a href="../scrapy框架/模块作用.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>8.2.</b>
                        
                        模块作用
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="8.3" data-path="scrapy框架/19个中间件.html">
            
                
                    <a href="../scrapy框架/19个中间件.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>8.3.</b>
                        
                        19个中间件
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="9" data-path="scrapy-redis分布式/scrapy-redis.html">
            
                
                    <a href="../scrapy-redis分布式/scrapy-redis.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>9.</b>
                        
                        scrapy-redis分布式
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="9.1" data-path="scrapy-redis分布式/scrapy-redis的改造方法.html">
            
                
                    <a href="../scrapy-redis分布式/scrapy-redis的改造方法.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>9.1.</b>
                        
                        scrapy-redis的改造方法
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="10" data-path="spider_projects/introduce.html">
            
                
                    <a href="../spider_projects/introduce.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.</b>
                        
                        spider_projects
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="10.1" data-path="spider_projects/使用代理在普通爬虫脚本下.html">
            
                
                    <a href="../spider_projects/使用代理在普通爬虫脚本下.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.1.</b>
                        
                        使用代理在普通爬虫脚本下
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.2" data-path="spider_projects/xpath爬取前程无忧的数据.html">
            
                
                    <a href="../spider_projects/xpath爬取前程无忧的数据.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.2.</b>
                        
                        xpath爬取前程无忧的数据
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.3" data-path="spider_projects/存储数据库的脚本.html">
            
                
                    <a href="../spider_projects/存储数据库的脚本.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.3.</b>
                        
                        存储数据库的脚本
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.4" data-path="spider_projects/weibo.html">
            
                
                    <a href="../spider_projects/weibo.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.4.</b>
                        
                        爬取央视新闻微博
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.5" data-path="spider_projects/weibo2.html">
            
                
                    <a href="../spider_projects/weibo2.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.5.</b>
                        
                        爬取明星的微博
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.6" data-path="spider_projects/liepin.html">
            
                
                    <a href="../spider_projects/liepin.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.6.</b>
                        
                        爬取猎聘的职位信息Crawl
                    </a>
            
            
        </li>
    
        <li class="chapter active" data-level="10.7" data-path="spider_projects/zhilianRe.html">
            
                
                    <a href="../spider_projects/zhilianRe.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.7.</b>
                        
                        纯正则爬取智联的职位信息
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.8" data-path="spider_projects/setting.html">
            
                
                    <a href="../spider_projects/setting.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.8.</b>
                        
                        scrapy中的setting
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.9" data-path="spider_projects/items.html">
            
                
                    <a href="../spider_projects/items.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.9.</b>
                        
                        scrapy中的items
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.10" data-path="spider_projects/pipeline.html">
            
                
                    <a href="../spider_projects/pipeline.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.10.</b>
                        
                        scrapy中的pipeline
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.11" data-path="spider_projects/middlewares.html">
            
                
                    <a href="../spider_projects/middlewares.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.11.</b>
                        
                        scrapy中的middlewares
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.12" data-path="spider_projects/框架中的代码运行脚本.html">
            
                
                    <a href="../spider_projects/框架中的代码运行脚本.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.12.</b>
                        
                        框架中的代码运行脚本
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="11" data-path="爬虫的面试题/面试题1-10.html">
            
                
                    <a href="../爬虫的面试题/面试题1-10.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>11.</b>
                        
                        爬虫面试题
                    </a>
            
            
        </li>
    


            
            <li class="divider"></li>
            <li>
                <a href="https://www.gitbook.com" target="blank" class="gitbook-link">
                    Published with GitBook
                </a>
            </li>
            
        </ul>
    </nav>
</div>

    <div class="book-body">
        <div class="body-inner">
            <div class="book-header" role="navigation">
    <!-- Actions Left -->
    

    <!-- Title -->
    <h1>
        <i class="fa fa-circle-o-notch fa-spin"></i>
        <a href="../" >爬虫</a>
    </h1>
</div>

            <div class="page-wrapper" tabindex="-1" role="main">
                <div class="page-inner">
                
                
                    <section class="normal" id="section-">
                    
                        <h1 id="&#x7528;&#x7EAF;&#x6B63;&#x5219;&#x6765;&#x722C;&#x53D6;&#x667A;&#x8054;&#x62DB;&#x8058;&#x7684;&#x804C;&#x4F4D;&#x4FE1;&#x606F;">&#x7528;&#x7EAF;&#x6B63;&#x5219;&#x6765;&#x722C;&#x53D6;&#x667A;&#x8054;&#x62DB;&#x8058;&#x7684;&#x804C;&#x4F4D;&#x4FE1;&#x606F;</h1>
<hr>
<pre><code class="lang-python"><span class="hljs-comment"># -*- coding: utf-8 -*-</span>
<span class="hljs-keyword">import</span> scrapy
<span class="hljs-keyword">import</span> json
<span class="hljs-keyword">import</span> re,time
<span class="hljs-keyword">from</span> w3lib.html <span class="hljs-keyword">import</span> remove_tags
<span class="hljs-keyword">from</span> projects.items <span class="hljs-keyword">import</span> LiepinItem

<span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">ZhilianSpider</span><span class="hljs-params">(scrapy.Spider)</span>:</span>
    name = <span class="hljs-string">&apos;zhilian&apos;</span>
    allowed_domains = [<span class="hljs-string">&apos;zhilian.com&apos;</span>,<span class="hljs-string">&apos;zhaopin.com&apos;</span>]
    <span class="hljs-comment"># start_urls = [&apos;http://zhilian.com/&apos;]</span>
    <span class="hljs-comment">#&#x7ED9;spider&#x4E00;&#x4E2A;&#x8D77;&#x59CB;&#x7684;url</span>
    <span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">start_requests</span><span class="hljs-params">(self)</span>:</span>
        base_url = <span class="hljs-string">&apos;https://fe-api.zhaopin.com/c/i/sou?start={}&amp;pageSize=60&amp;cityId=530&amp;workExperience=-1&amp;education=-1&amp;companyType=-1&amp;employmentType=-1&amp;jobWelfareTag=-1&amp;kw=Python&amp;kt=4&amp;lastUrlQuery=%7B%22p%22:2,%22pageSize%22:%2260%22,%22jl%22:%22530%22,%22kw%22:%22Python%22,%22kt%22:%224%22%7D&apos;</span>
        <span class="hljs-comment">#&#x7ED9;&#x8FD9;&#x4E2A;url &#x6570;&#x636E;&#xFF0C;&#x505A;&#x4E00;&#x4E2A;&#x62FC;&#x63A5;&#xFF0C;&#x9875;&#x7801;&#x6570;&#x7684;&#x53D8;&#x5316;</span>
        <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> range(<span class="hljs-number">1</span>,<span class="hljs-number">500</span>):
            print(<span class="hljs-string">&apos;--------------------------------&#x7B2C;&apos;</span>,i,<span class="hljs-string">&quot;&#x9875;------------------------------------------------&quot;</span>)
            url = base_url.format(i*<span class="hljs-number">60</span>)
            req = scrapy.Request(url=url, callback=self.parse)
            <span class="hljs-comment">#&#x52A0;&#x4E00;&#x4E2A;&#x8BF7;&#x6C42;&#x5934;</span>
            req.headers[<span class="hljs-string">&apos;User-Agent&apos;</span>] = <span class="hljs-string">&apos;Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36&apos;</span>
            <span class="hljs-keyword">yield</span>  req

    <span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">parse</span><span class="hljs-params">(self, response)</span>:</span>
        <span class="hljs-comment"># &#x5C06;&#x83B7;&#x53D6;&#x5230;&#x7684;&#x5185;&#x5BB9;&#x505A;&#x4E00;&#x4E2A;&#x7F16;&#x7801;&#xFF0C;&#x5F97;&#x5230;json&#x683C;&#x5F0F;</span>
        res_dict = json.loads(response.body.decode(<span class="hljs-string">&apos;utf-8&apos;</span>))
        <span class="hljs-comment">#&#x904D;&#x5386;&#x83B7;&#x53D6;&#x5230;&#x7684;&#x5185;&#x5BB9;&#x7684;results</span>
        <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> res_dict[<span class="hljs-string">&apos;data&apos;</span>][<span class="hljs-string">&apos;results&apos;</span>]:
            <span class="hljs-comment">#&#x5728;&#x8FD9;&#x4E2A;&#x91CC;&#x9762;&#x83B7;&#x53D6;&#x6211;&#x4EEC;&#x60F3;&#x8981;&#x5F97;&#x5230;&#x5DE5;&#x4F5C;&#x8BE6;&#x60C5;&#x9875;&#x7684;url</span>
            href = i[<span class="hljs-string">&apos;positionURL&apos;</span>]
            <span class="hljs-comment">#&#x8BF7;&#x6C42;&#x8FD9;&#x4E2A;url &#x83B7;&#x53D6;&#x8BE6;&#x60C5;&#x9875;</span>
            req = scrapy.Request(url=href, callback=self.parse_detailed)
            <span class="hljs-keyword">yield</span> req

    <span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">parse_detailed</span><span class="hljs-params">(self,response)</span>:</span>
        <span class="hljs-comment"># with open(&apos;zhilian.html&apos;,&apos;wb&apos;) as f:</span>
        <span class="hljs-comment">#     f.write(response.body)</span>
        <span class="hljs-comment">#&#x6765;&#x6355;&#x6349;&#x5F02;&#x5E38;&#x7684;</span>
        <span class="hljs-keyword">try</span>:
            <span class="hljs-comment">#&#x5C06;html &#x7F16;&#x7801; utf-8 &#x6765;&#x83B7;&#x53D6;&#x4FE1;&#x606F;</span>
            html = response.body.decode(<span class="hljs-string">&apos;utf-8&apos;</span>)
            <span class="hljs-comment">#&#x6B63;&#x5219;&#x5339;&#x914D; &#x83B7;&#x53D6;&#x6807;&#x9898;</span>
            title_re = re.compile(<span class="hljs-string">r&apos;&lt;h1&gt;(.*?)&lt;/h1&gt;&apos;</span>,re.S)
            title = re.search(title_re,html).group(<span class="hljs-number">1</span>)
            <span class="hljs-comment">#&#x6B63;&#x5219;&#x5339;&#x914D;&#x83B7;&#x53D6;&#x5DE5;&#x8D44;</span>
            salary = re.search(<span class="hljs-string">&apos;&lt;strong&gt;(.*?)&lt;a href=&quot;http://www.zhaopin.com/\w.*/&quot;.*?&gt;&apos;</span>,html).group(<span class="hljs-number">1</span>)
            salary = salary.split(<span class="hljs-string">&apos;&amp;&apos;</span>)[<span class="hljs-number">0</span>]
            <span class="hljs-comment">#&#x56E0;&#x4E3A;&#x8FD9;&#x91CC;&#x7684; &#x7ECF;&#x9A8C; &#x5B66;&#x5386;&#x8981;&#x6C42;&#xFF0C;&#x5DE5;&#x4F5C;&#x5730;&#x70B9;&#xFF0C;&#x62DB;&#x8058;&#x4EBA;&#x6570;&#xFF0C;</span>
            <span class="hljs-comment"># &#x4EE5;&#x53CA;&#x53D1;&#x5E03;&#x65F6;&#x95F4;&#x6CA1;&#x6709;&#x4E13;&#x95E8;&#x7684;&#x7C7B;&#x6216;&#x8005;id&#xFF0C;&#x6240;&#x4EE5;&#x9700;&#x8981;&#x83B7;&#x53D6;&#x6240;&#x6709;&#x7684;&#x4E00;&#x8D77;&#x83B7;&#x5F97;&#xFF0C;&#x7136;&#x540E;&#x6765;&#x5207;&#x5206;</span>
            <span class="hljs-comment">#&#x53D6;&#x51FA;&#x6240;&#x6709;&#x7684; ul &#x5185;&#x7684;&#x5185;&#x5BB9;&#xFF0C;&#x7136;&#x540E;&#x6765;&#x5207;&#x5206;&#x53D6;&#x503C;</span>
            ul_re = re.compile(<span class="hljs-string">&apos;&lt;ul class=&quot;terminal-ul clearfix&quot;&gt;(.*?)&lt;/ul&gt;&apos;</span>,re.S)
            ul = re.search(ul_re,html).group(<span class="hljs-number">1</span>)
            info_li = remove_tags(ul)
            info = info_li.split(<span class="hljs-string">&apos;\n&apos;</span>)
            <span class="hljs-comment"># print(info)</span>
            experince = <span class="hljs-string">&quot;&quot;</span>
            number = <span class="hljs-string">&quot;&quot;</span>
            education = <span class="hljs-string">&quot;&quot;</span>
            time_show = <span class="hljs-string">&quot;&quot;</span>
            position=<span class="hljs-string">&quot;&quot;</span>
            <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> info:
                <span class="hljs-keyword">if</span> i == <span class="hljs-string">&quot;&quot;</span>:
                    info.remove(i)
                <span class="hljs-keyword">if</span> <span class="hljs-string">&quot;&#x5DE5;&#x4F5C;&#x5730;&#x70B9;&quot;</span> <span class="hljs-keyword">in</span> i:
                    i = i.split(<span class="hljs-string">&apos;&#xFF1A;&apos;</span>)
                    <span class="hljs-comment"># print(i)</span>
                    position += i[-<span class="hljs-number">1</span>]
                    <span class="hljs-comment"># print(position)</span>
                <span class="hljs-keyword">if</span> <span class="hljs-string">&quot;&#x5DE5;&#x4F5C;&#x7ECF;&#x9A8C;&quot;</span> <span class="hljs-keyword">in</span> i:
                    i = i.split(<span class="hljs-string">&apos;&#xFF1A;&apos;</span>)
                    <span class="hljs-comment"># print(i)</span>
                    experince += i[-<span class="hljs-number">1</span>]
                    <span class="hljs-comment"># print(experience)</span>
                <span class="hljs-keyword">if</span> <span class="hljs-string">&quot;&#x6700;&#x4F4E;&#x5B66;&#x5386;&quot;</span> <span class="hljs-keyword">in</span> i :
                    i=i.split(<span class="hljs-string">&apos;&#xFF1A;&apos;</span>)
                    education += i[-<span class="hljs-number">1</span>]
                    <span class="hljs-comment"># print(education)</span>
                <span class="hljs-keyword">if</span> <span class="hljs-string">&quot;&#x62DB;&#x8058;&#x4EBA;&#x6570;&quot;</span> <span class="hljs-keyword">in</span> i :
                    i = i.split(<span class="hljs-string">&apos;&#xFF1A;&apos;</span>)
                    number += i[-<span class="hljs-number">1</span>]
                <span class="hljs-keyword">if</span> <span class="hljs-string">&quot;&#x53D1;&#x5E03;&#x65E5;&#x671F;&quot;</span> <span class="hljs-keyword">in</span> i :
                    i = i.split(<span class="hljs-string">&apos;&#xFF1A;&apos;</span>)
                    time_show += i[-<span class="hljs-number">1</span>]
            <span class="hljs-comment">#&#x83B7;&#x53D6;&#x5DE5;&#x4F5C;&#x804C;&#x4F4D;&#x7684;&#x63CF;&#x8FF0;</span>
            description = re.search(<span class="hljs-string">r&apos;&lt;div class=&quot;tab-inner-cont&quot;&gt;(.*?)&lt;b&gt;&apos;</span>,html,re.S).group(<span class="hljs-number">1</span>)
            description = remove_tags(description)
            description = <span class="hljs-string">&apos;&apos;</span>.join(description)
            <span class="hljs-comment"># print(description)</span>
            <span class="hljs-comment">#&#x83B7;&#x53D6;&#x516C;&#x53F8;&#x540D;&#x79F0;</span>
            com_name_re = re.compile(<span class="hljs-string">&apos;target=&quot;_blank&quot;&gt;(.*?)&lt;img class=&quot;icon_vip&quot;&apos;</span>)
            com_name =re.search(com_name_re,html).group(<span class="hljs-number">1</span>)
            <span class="hljs-comment">#&#x722C;&#x866B;&#x7684;&#x6DFB;&#x52A0;&#x65F6;&#x95F4;</span>
            add_time = time.strftime(<span class="hljs-string">&quot;%Y/%m/%d %H:%M:%S&quot;</span>, time.localtime())
            <span class="hljs-comment">#&#x6765;&#x81EA;&#x4E8E;&#x54EA;&#x4E2A;&#x7F51;&#x7AD9;</span>
            from_web = <span class="hljs-string">&quot;&#x667A;&#x8054;&#x62DB;&#x8058; zhaopin.com&quot;</span>
            data = (title, salary, position, experince, education, number, time_show, description, com_name, add_time, from_web)
            <span class="hljs-comment"># print(data)</span>
            item = LiepinItem()
            item[<span class="hljs-string">&apos;title&apos;</span>] = title
            item[<span class="hljs-string">&apos;salary&apos;</span>] = salary
            item[<span class="hljs-string">&apos;position&apos;</span>] = position
            item[<span class="hljs-string">&apos;experince&apos;</span>] = experince
            item[<span class="hljs-string">&apos;education&apos;</span>] = education
            item[<span class="hljs-string">&apos;number&apos;</span>] = number
            item[<span class="hljs-string">&apos;time_show&apos;</span>] = time_show
            item[<span class="hljs-string">&apos;description&apos;</span>] = description
            item[<span class="hljs-string">&apos;com_name&apos;</span>] = com_name
            item[<span class="hljs-string">&apos;add_time&apos;</span>] = add_time
            item[<span class="hljs-string">&apos;from_web&apos;</span>] = from_web
            <span class="hljs-keyword">yield</span> item
        <span class="hljs-keyword">except</span>:
            <span class="hljs-keyword">pass</span>
</code></pre>

                    
                    </section>
                
                
                </div>
            </div>
        </div>

        
        <a href="../spider_projects/liepin.html" class="navigation navigation-prev " aria-label="Previous page: 爬取猎聘的职位信息Crawl"><i class="fa fa-angle-left"></i></a>
        
        
        <a href="../spider_projects/setting.html" class="navigation navigation-next " aria-label="Next page: scrapy中的setting"><i class="fa fa-angle-right"></i></a>
        
    </div>
</div>

        
<script src="../gitbook/app.js"></script>

    
    <script src="../gitbook/plugins/gitbook-plugin-search/lunr.min.js"></script>
    

    
    <script src="../gitbook/plugins/gitbook-plugin-search/search.js"></script>
    

    
    <script src="../gitbook/plugins/gitbook-plugin-sharing/buttons.js"></script>
    

    
    <script src="../gitbook/plugins/gitbook-plugin-fontsettings/buttons.js"></script>
    

<script>
require(["gitbook"], function(gitbook) {
    var config = {"highlight":{},"search":{"maxIndexSize":1000000},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2}};
    gitbook.start(config);
});
</script>

        
    </body>
    
</html>
